# -*- coding: utf-8 -*-
"""
Created on Thu Dec 03 10:32:18 2015

@author: liliangrong
"""

import re
import urllib
url = "http://m.sohu.com/?v=3&_once_=000025_v2tov3&_smuid=ICvXXapq5EfTpQTVq6Tpz"
resp = urllib.urlopen(url)
page = resp.read()
f = open("souhu.txt","w")
pattern = re.compile(r'<a.*?class="h4".*?>(.*?)</a>')

#print page
#搜索所有适合的记录
page_div = re.findall(pattern,page)

#搜索单条记录
#page_js = re.search(pattern,page)
removeSpan = re.compile('<span .*?>.*?</span>')
#print page

for line in page_div:
    l = re.sub(removeSpan,'',line)
    f.write(l)
    f.write("\n----------------------------------------------\n")
#f.write(page_js)
#print dir(resp)